/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				3/18/2018
Date last modified: 		
Purpose: 
	   -Use ASM/NBER expenditure and shipment data to calculate
		industry-year level energy intensities
	   -Use enery prices to convert expenditure into use amount [Btu]
		
		Steps:
			I. import and merge
				1. ASM shipment and expenditure
				2. NBER shipment and expenditure
				3. energy prices
			II.  generate variables
				1. expenditure
				2. intensities
==========================================================================*/


* Intensities = ASM or NBER expenditure / ASM or NBER shipment


*************************************************************************
* 				PART A. import data										*
*************************************************************************

* 1. price
use "$energy_price/energy_price_industry_year.dta", clear

* 2. NBER
preserve
	use "$buildpath/input/shipments/NBER_shipments_naics1997_1958-2011.dta", clear
	keep naics year energy vship
	drop if energy == .
	rename energy cost_fuel_elec_nber
	rename vship shipment_nber
	
	keep cost_fuel_elec_nber shipment_nber naics year
	collapse (sum) cost_fuel_elec_nber shipment_nber, by(naics year)
	label var cost_fuel_elec_nber "Cost of electric & fuels in $1m"
	label var shipment "Total value of shipments in $1m"
	
	keep if year >= 1990
	tempfile nber
	save `nber.dta', replace
restore

merge m:1 naics year using `nber.dta'
drop if _merge == 2
drop _merge


* 3. ASM
preserve
	use "$ASM/ASM.dta", clear
	keep if level == 6 | naicsid_str == "31131" | naicsid_str == "31181"
	
	keep naicsid naicsid_str naicsid_5digit code naicsdisplaylabel year ///
		cst_fuel cst_elec q_elec_pch q_elec_gen q_elec_sld shipment

	replace code = "correct" if code == "6-digit" | naicsid_str == "31131" | naicsid_str == "31181"
	drop naicsid 
	rename naicsid_str naics_asm
	tempfile asm
	save `asm.dta', replace
restore

* construct asm merge key
do "$buildpath/code/subfunctions/genASMmergekey.do"

* merge
merge m:1 naics_asm year using `asm.dta'
drop if _merge == 2
drop _merge

* 4. MECS at NAICS6 level
preserve
	use "$MECSenergy/MECS.dta", clear

	* to keep at NAICS6 level as much as possible 
	keep if level == 6
	rename level level_mecs

	* drop 1 obs in 2014 (other sector)
	drop if naics == 98

	* rename variables
	drop order naicsid_str
	rename naics naics_mecs
	rename industry industry_name 

	* save the file
	keep naics_mecs year firstuse_fuel_tot_2
	label var firstuse_fuel_tot_2 "Trillion Btu, from MECS"
	tempfile MECS_naics6
	save `MECS_naics6.dta', replace

restore

capture drop naics_mecs
gen naics_mecs = naics2007 if year <= 2010
replace naics_mecs = naics2012 if year > 2010

merge m:1 naics_mecs year using `MECS_naics6.dta'
drop _merge 

* 5. clean variables and set the measurement unit consistent
preserve
	keep naics_asm naics
	duplicates drop
	bysort naics_asm: gen N = _N
	
	tempfile numberofsubindustries
	save `numberofsubindustries.dta', replace
restore

merge m:1 naics_asm naics using `numberofsubindustries.dta'
drop _merge
label var N  "number of NAICS6 industries within this NAICS_asm "

* shipment
rename shipment shipment_asm
replace shipment_asm = shipment_asm/1000
label var shipment_asm "Total value of shipments in $1m"

* amount of energy use in Btu
gen usage_elec_asm = q_elec_pch * 3.412
label var usage_elec_asm "Quantity of electricity purchased for heat and power million Btu"
gen usage_mecs = firstuse_fuel_tot_2*1000000
label var usage_mecs "first use fuel, million Btu, from MECS (for selected industries only)"

* energy expenditures in $
replace cst_fuel = cst_fuel/1000
label var cst_fuel "Purchased fuels ($1m)"
replace cst_elec = cst_elec/1000
label var cst_elec "Purchased electricity ($1m)"
rename cst_fuel cost_fuel_asm
rename cst_elec cost_elec_asm

*
drop if naics == 31131 | naics == 31181 | naics == 339111

* other variables
keep N year naics2007 naics2012 naics industryname* year ///
		price_TIV price_fuel_TIV price_TV price_fuel_TV price_elec ///
	    cost_fuel_elec_nber shipment_nber cost_fuel_asm cost_elec_asm shipment_asm usage_elec_asm usage_mecs

order cost_fuel_asm cost_elec_asm usage_elec_asm, a(cost_fuel_elec_nber)
order usage_elec_asm usage_mecs, a(shipment_asm)


*************************************************************************
* 	Save a version of prices and energy intensities at NAICS2007 level  *
*************************************************************************
preserve

	keep if year == 2007 | year == 2006
	* (keep MECS data from 2006, will rescale by 2007 dollar)
	
	* generate energy use amount
	gen usage_asm = usage_elec_asm + cost_fuel_asm * 1000000/price_fuel_TV
	label var usage_asm "Quantity of electric & fuels in 1 million Btu"
		gen usage_elec_asm_compare = cost_elec_asm * 1000000/price_elec
		label var usage_elec_asm_compare "Quantity of electric in 1 million Btu"
		sum usage_elec_asm_compare usage_elec_asm
		drop usage_elec_asm_compare
	
	gen usage_nber = cost_fuel_elec_nber*1000000/price_TV
	label var usage_nber "Quantity of electric & fuels in 1 million Btu"

	* generate intensities
	gen intensity_asm = usage_asm / shipment_asm
	label var intensity_asm "million Btu / $1m shipment"

	gen intensity_nber = usage_nber / shipment_nber
	label var intensity_nber "million Btu / $1m shipment"
	
	gen intensity_mecs = usage_mecs / (shipment_nber /.97384644)
	label var intensity_mecs "million Btu / $1m shipment"
	* .97384644 : 2006 USD to 2007 USD
	
	gen intensity_asm_2 = (cost_fuel_asm + cost_elec_asm)  /shipment_asm
	label var intensity_asm_2 "$ expenditure / $ shipment"

	gen intensity_nber_2 = cost_fuel_elec_nber /shipment_nber
	label var intensity_nber_2 "$ expenditure / $ shipment"

	keep intensity_nber* intensity_asm* intensity_mecs usage_mecs usage_asm usage_nber naics naics2012  N year industryname2007
	order naics  
	label var naics "naics2007"
	
	* keep 2007 data only
	sort naics year
	bysort naics: replace usage_mecs = usage_mecs[1]
	bysort naics: replace intensity_mecs = intensity_mecs[1]
	keep if year == 2007
	
	* generate final intensity as a combinatin of ASM and NBER intensities
	gen intensity = intensity_mecs
	replace intensity = intensity_asm if intensity == . & N == 1
	replace intensity = intensity_nber if intensity == . | intensity == 0
	label var intensity "million Btu/ $1m shipment"

	gen intensity_2 = intensity_asm_2
	replace intensity_2 = intensity_nber_2 if N > 1 | intensity_asm_2 == . | intensity_asm_2 == 0
	label var intensity_2 "$ expenditure / $ shipment"

	order intensity intensity_2, b(N)
	order usage_mecs, b(usage_asm)
	
	save "$EI/energy_intensity_2007_naics2007.dta", replace
restore


*************************************************************************
* 				PART B. generate intensities at 2012 NAICS level		*
*************************************************************************

* Note:
* some ASM industries are acutally at NAICS5-6 level but our observations are at NAICS6 level.
* Fortunately, the ASM shipment will be at the same level.
* So I won't do any rescale of these industries. And each industry will be assigned an weighted average intensity.

* 1. deflate the shipments
* merge in deflator
merge m:1 year using "$buildpath/output/deflator/deflator_year.dta", keep(1 3) nogen

* merge in industry-specific deflator
rename naics naics_1
gen naics = naics2007
replace naics = naics2012 if year >= 2010
merge m:1 naics year using "$buildpath/output/deflator/deflator_industry_year.dta"
drop if _merge == 2
drop _merge
drop naics
rename naics_1 naics

* deflate
foreach var in nber asm {
	gen shipment_`var'_2 		= shipment_`var' / ppidef_ipol
	label var shipment_`var'_2 	"Total value of shipments in $1m, ppidef_ipol"
	gen shipment_`var'_1 		= shipment_`var' / gdpdef
	label var shipment_`var'_1 	"Total value of shipments in $1m, gdpdef"
}


* 2. collapse to 2012 level
* from 2012+, ASM data are already at NAICS2012 level. ( will do (sum) pre2011ASMvars (mean) post2012ASMvars )
* for year 2014+, MECS data are at NAICS2012 level. ( will do (sum) 1998-2010MECSvars (mean) 2014MECSvars )
gen 	shipment_asm_post2012 = .
replace shipment_asm_post2012 = shipment_asm if year >= 2012
gen 	shipment_asm_1_post2012 = .
replace shipment_asm_1_post2012 = shipment_asm_1 if year >= 2012
gen 	shipment_asm_2_post2012 = .
replace shipment_asm_2_post2012 = shipment_asm_2 if year >= 2012
gen 	cost_fuel_asm_post2012 = .
replace cost_fuel_asm_post2012 = cost_fuel_asm if year >= 2012
gen 	cost_elec_asm_post2012 = .
replace cost_elec_asm_post2012 = cost_elec_asm if year >= 2012
gen 	usage_elec_asm_post2012 = .
replace usage_elec_asm_post2012 = usage_elec_asm if  year >= 2012
gen 	usage_mecs_2014 = .
replace usage_mecs_2014 = usage_mecs if  year >= 2012

* record the missings
foreach var in cost_fuel_elec_nber shipment_nber shipment_nber_1 shipment_nber_2 cost_fuel_asm cost_elec_asm shipment_asm shipment_asm_1 shipment_asm_2 usage_elec_asm usage_mecs {
	bysort naics2012 year (`var') : gen am_`var' = mi(`var'[1])
}
collapse (sum) cost_fuel_elec_nber shipment_nber shipment_nber_1 shipment_nber_2 cost_fuel_asm cost_elec_asm shipment_asm shipment_asm_1 shipment_asm_2 usage_elec_asm usage_mecs ///
		(mean) N price_TIV price_fuel_TIV price_TV price_fuel_TV price_elec ///
		(first) shipment_asm_post2012 shipment_asm_1_post2012 shipment_asm_2_post2012 cost_fuel_asm_post2012 cost_elec_asm_post2012 usage_elec_asm_post2012 usage_mecs_2014 ///
		(min) am*   , by(naics2012 industryname2012 year)

foreach var in cost_fuel_elec_nber shipment_nber shipment_nber_1 shipment_nber_2 cost_fuel_asm cost_elec_asm shipment_asm shipment_asm_1 shipment_asm_2 usage_elec_asm usage_mecs {
	replace `var' = . if am_`var' == 1 | `var' == 0
}
drop am*
foreach var in shipment_asm shipment_asm_1 shipment_asm_2 cost_fuel_asm cost_elec_asm usage_elec_asm {
	replace `var' = `var'_post2012 if year >=2012
	drop `var'_post2012
}
replace usage_mecs = usage_mecs_2014 if year == 2014
drop usage_mecs_2014
*

* 2. generate energy use amount
gen usage_asm = usage_elec_asm + cost_fuel_asm * 1000000/price_fuel_TV
label var usage_asm "Quantity of electric & fuels in 1 million Btu"
	gen usage_elec_asm_compare = cost_elec_asm * 1000000/price_elec
	label var usage_elec_asm_compare "Quantity of electric in 1 million Btu"
	sum usage_elec_asm_compare usage_elec_asm
	drop usage_elec_asm_compare
	
gen usage_nber = cost_fuel_elec_nber*1000000/price_TV
label var usage_nber "Quantity of electric & fuels in 1 million Btu"

* 3. generate intensities
gen intensity_asm = usage_asm / shipment_asm_1
label var intensity_asm "million Btu / $1m shipment, gdpdef"
gen intensity_asm_ppidef = usage_asm / shipment_asm_2
label var intensity_asm_ppidef "million Btu / $1m shipment, ppidef_ipol"

gen intensity_nber = usage_nber / shipment_nber_1
label var intensity_nber "million Btu / $1m shipment, gdpdef"
gen intensity_nber_ppidef = usage_nber / shipment_nber_2
label var intensity_nber_ppidef "million Btu / $1m shipment, ppidef_ipol"

gen shipment_1 = shipment_nber_1
replace shipment_1 = shipment_asm_1 if year >= 2012 & N == 1 & shipment_asm_1 !=.
gen intensity_mecs = usage_mecs / shipment_1
label var intensity_mecs "million Btu / $1m shipment, gdpdef"

gen shipment_2 = shipment_nber_2
replace shipment_2 = shipment_asm_2 if year >= 2012 & N == 1 & shipment_asm_2 !=.
gen intensity_mecs_ppidef = usage_mecs / shipment_2
label var intensity_mecs_ppidef "million Btu / $1m shipment, ppidef_ipol"

	
gen intensity_asm_2 = (cost_fuel_asm + cost_elec_asm)  /shipment_asm
label var intensity_asm_2 "$ expenditure / $ shipment"

gen intensity_nber_2 = cost_fuel_elec_nber /shipment_nber
label var intensity_nber_2 "$ expenditure / $ shipment"

*************************************************************************
* 				PART C. save											*
*************************************************************************

keep intensity_nber* intensity_asm* intensity_me* naics  N year industryname2012
order naics  

************************************
* keep a 2007 version
************************************
preserve
	drop *ppidef
	* generate final intensity as a combinatin of MECS ASM and NBER intensities
	gen intensity_old = intensity_asm 
	replace intensity_old = intensity_nber if N > 1 | intensity_asm == . | intensity_asm == 0
	label var intensity_old "million Btu/ $1m shipment, using only ASM & NBER"

	gen intensity = intensity_mecs
	replace intensity = intensity_asm if intensity == . & N == 1
	replace intensity = intensity_nber if intensity == . | intensity == 0
	label var intensity "million Btu/ $1m shipment, using MECS, ASM, NBER"
	
	gen intensity_2 = intensity_asm_2
	replace intensity_2 = intensity_nber_2 if N > 1 | intensity_asm_2 == . | intensity_asm_2 == 0
	label var intensity_2 "$ expenditure / $ shipment"

	order intensity intensity_old intensity_2, b(N)


	keep if year == 2007 | year == 2006
	sort naics year
	bysort naics: replace intensity_mecs = intensity_mecs[1]
	keep if year == 2007
	
	drop intensity
	gen intensity = intensity_mecs
	replace intensity = intensity_asm if intensity == . & N == 1
	replace intensity = intensity_nber if intensity == . | intensity == 0
	label var intensity "million Btu/ $1m shipment, using MECS, ASM, NBER"
	order intensity, b(intensity_old)
	drop year

save "$EI/energy_intensity_2007.dta", replace

restore


************************************************************************
* year-variant version
************************************************************************

* 1. interpolate to fillin the holes in MECS data **********************

************************ 1.1 intensity_mecs
bysort naics: egen n = total(mi(intensity_mecs))
replace n = 27-n
label var n "number of MECS data points in this industry"
bysort naics2012: ipolate intensity_mecs year, gen(intensity_mecs_ipol) 

* take the first nonmissing and last nonmissing values
bysort naics2012 (year): gen countnonmissing = sum(!missing(intensity_mecs_ipol)) if !missing(intensity_mecs_ipol)
bysort naics2012 (countnonmissing) : gen firstnonmissing = intensity_mecs_ipol[1]
replace countnonmissing = - countnonmissing 
bysort naics2012 (countnonmissing) : gen lastnonmissing = intensity_mecs_ipol[1]
sort naics2012 year

* separate the observations in 3 groups: 
* (1) beofore the first nonmissing (2) between first and last nonmissings (3) after the last nonmissing
* for group 2, we already interpolate it
* for group 1, fill in the first nonmissing value
* for group 3, fill in the last nonmissing value
replace countnonmissing = 1 if countnonmissing != .
replace countnonmissing = 0 if countnonmissing == .
bysort naics2012 (year) : gen nonmissing = countnonmissing[_n] - countnonmissing[_n-1]
replace nonmissing = 2 if nonmissing == -1
bysort naics2012 (year) : gen group = sum(nonmissing)
drop nonmissing countnonmissing
replace group = . if n == 0

replace intensity_mecs_ipol = firstnonmissing if group == 0 & n != 0
replace intensity_mecs_ipol = lastnonmissing if group == 3 & n != 0

drop group firstnonmissing lastnonmissing n

************************ 1.2 intensity_mecs_ppidef
bysort naics: egen n = total(mi(intensity_mecs_ppidef))
replace n = 27-n
label var n "number of MECS data points in this industry"
bysort naics2012: ipolate intensity_mecs_ppidef year, gen(intensity_mecs_ppidef_ipol) 

* take the first nonmissing and last nonmissing values
bysort naics2012 (year): gen countnonmissing = sum(!missing(intensity_mecs_ppidef_ipol)) if !missing(intensity_mecs_ppidef_ipol)
bysort naics2012 (countnonmissing) : gen firstnonmissing = intensity_mecs_ppidef_ipol[1]
replace countnonmissing = - countnonmissing 
bysort naics2012 (countnonmissing) : gen lastnonmissing = intensity_mecs_ppidef_ipol[1]
sort naics2012 year

* separate the observations in 3 groups: 
* (1) beofore the first nonmissing (2) between first and last nonmissings (3) after the last nonmissing
* for group 2, we already interpolate it
* for group 1, fill in the first nonmissing value
* for group 3, fill in the last nonmissing value
replace countnonmissing = 1 if countnonmissing != .
replace countnonmissing = 0 if countnonmissing == .
bysort naics2012 (year) : gen nonmissing = countnonmissing[_n] - countnonmissing[_n-1]
replace nonmissing = 2 if nonmissing == -1
bysort naics2012 (year) : gen group = sum(nonmissing)
drop nonmissing countnonmissing
replace group = . if n == 0

replace intensity_mecs_ppidef_ipol = firstnonmissing if group == 0 & n != 0
replace intensity_mecs_ppidef_ipol = lastnonmissing if group == 3 & n != 0

drop group firstnonmissing lastnonmissing n

order intensity_mecs_ipol intensity_mecs_ppidef_ipol, a(intensity_mecs_ppidef)



* 2. generate variables ************************************************
* generate final intensity as a combinatin of MECS ASM and NBER intensities
	gen 	intensity = intensity_mecs_ipol
	replace intensity = intensity_asm 		if intensity == . & N == 1
	replace intensity = intensity_nber 		if intensity == . | intensity == 0
	label var intensity "million Btu/ $1m shipment, gdpdef, using MECS, ASM, NBER"
	
	gen 	intensity_ppidef = intensity_mecs_ppidef_ipol
	replace intensity_ppidef = intensity_asm_ppidef 		if intensity_ppidef == . & N == 1
	replace intensity_ppidef = intensity_nber_ppidef 		if intensity_ppidef == . | intensity_ppidef == 0
	label var intensity_ppidef "million Btu/ $1m shipment, ppidef, using MECS, ASM, NBER"
	
	gen intensity_2 = intensity_asm_2
	replace intensity_2 = intensity_nber_2 if N > 1 | intensity_asm_2 == . | intensity_asm_2 == 0
	label var intensity_2 "$ expenditure / $ shipment"

	order intensity intensity_ppidef intensity_2, b(N)

* interpolate intensity to fillin holes from ASM/NBER
sort naics2012 year
bysort naics2012: ipolate intensity year, gen(intensity_ipol) 
bysort naics: egen n = total(mi(intensity_ipol))
replace n = 27-n

* take the first nonmissing and last nonmissing values
bysort naics2012 (year): gen countnonmissing = sum(!missing(intensity_ipol)) if !missing(intensity_ipol)
bysort naics2012 (countnonmissing) : gen firstnonmissing = intensity_ipol[1]
replace countnonmissing = - countnonmissing 
bysort naics2012 (countnonmissing) : gen lastnonmissing = intensity_ipol[1]
sort naics2012 year

* separate the observations in 3 groups: 
* (1) beofore the first nonmissing (2) between first and last nonmissings (3) after the last nonmissing
* for group 2, we already interpolate it
* for group 1, fill in the first nonmissing value
* for group 3, fill in the last nonmissing value
replace countnonmissing = 1 if countnonmissing != .
replace countnonmissing = 0 if countnonmissing == .
bysort naics2012 (year) : gen nonmissing = countnonmissing[_n] - countnonmissing[_n-1]
replace nonmissing = 2 if nonmissing == -1
bysort naics2012 (year) : gen group = sum(nonmissing)
drop nonmissing countnonmissing
replace group = . if n == 0 | n == 27

replace intensity_ipol = firstnonmissing if group == 0 & n != 0
replace intensity_ipol = lastnonmissing if (group == 2 | group ==3) & n != 0

drop group firstnonmissing lastnonmissing n
replace intensity = intensity_ipol
drop intensity_ipol


* save
sort naics2012 year
local v "$version"
save "$EI/energy_intensity_industry_year.dta",  replace

